/*    Copyright 2010 Tobias Marschall
 *
 *    This file is part of MoSDi.
 *
 *    MoSDi is free software: you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation, either version 3 of the License, or
 *    (at your option) any later version.
 *
 *    MoSDi is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with MoSDi.  If not, see <http://www.gnu.org/licenses/>.
 */

package mosdi.subcommands;

import java.util.ArrayList;
import java.util.List;
import java.util.StringTokenizer;

import mosdi.fa.AutomataUtils;
import mosdi.fa.FiniteMemoryTextModel;
import mosdi.fa.IIDTextModel;
import mosdi.matching.Matcher;
import mosdi.paa.DAA;
import mosdi.paa.MinimizableDAA;
import mosdi.paa.MinimizedDAA;
import mosdi.paa.PAA;
import mosdi.paa.TextBasedPAA;
import mosdi.paa.apps.EmissionDifferenceDAA;
import mosdi.paa.apps.PatternMatchingDAA;
import mosdi.tools.PatternMatchingAnalysis;
import mosdi.util.Alphabet;
import mosdi.util.BitArray;
import mosdi.util.Log;
import mosdi.util.SequenceUtils;

public class CostDistributionSubcommand extends Subcommand {

	@Override
	public String description() {
		return "Computes the cost distribution for given algorithm and pattern.";
	}

	@Override
	public String name() {
		return "cost-distribution";
	}

	@Override
	public String usage() {
		return super.usage()+" [options] <algorithm> <pattern> <textlength>\n" +
		"\n" +
		"Constructs a PAA for given algorithm and pattern and computes the\n" +
		"cost distribution when reading a random text of given length.\n" +
		"\n" +
		"If two algorithms are given (comma-separated), the product automaton\n" +
		"is constructed and the joint cost-distribution is computed.\n" +
		"\n" +
		"Possible algorithms: "+ PatternMatchingAnalysis.allMatcherNames() +"\n" +
		"\n" +
		"Options:\n" +
		"  -A <alphabet>: default: ACGT\n" +
		"  -q <qgram-file>: file with q-gram frequencies specifying the text model\n" +
		"                   (default: uniform i.i.d. text model) A q-gram table can be created\n" +
		"                   by using \"mosdi-utils count-qgrams\".\n" +
		"  -m <maxCost>: maximal cost to be considered; affects size of PAA\n" +
		"                (default: m*(n-m+1), where m=patternlength and n=textlength).\n" +
		"                When two algorithms are compared, this parameter is interpreted\n" +
		"                as maximal difference between costs of both algorithms.";
	}

	@Override
	public int run(String[] args) {
		parseOptions(args, 3, "A:q:m:");

		// Option dependencies
		// -- none --

		// Mandatory arguments
		String algorithmParameter = getStringArgument(0);
		String patternParameter = getStringArgument(1);
		int textLength = getIntArgument(2);

		// Options
		int maxCost = getIntOption("m",-1);
		int[] pattern = null;
		String qGramTableFilename = getStringOption("q", null);
		
		FiniteMemoryTextModel textModel = null;
		Alphabet alphabet = Alphabet.getDnaAlphabet();
		if (optionSet.has("A")) alphabet = new Alphabet((String)optionSet.valueOf("A"));
		try {
			pattern = alphabet.buildIndexArray(patternParameter);
		} catch (Exception e) {
			Log.errorln("Error processing pattern argument: "+e.getMessage());
			System.exit(1);
		}
		if (maxCost==-1) maxCost = pattern.length*(textLength-pattern.length+1); 
		if (qGramTableFilename!=null) {
			try {
				textModel = SequenceUtils.buildTextModelFromQGramFile(qGramTableFilename, alphabet);
			} catch (Exception e) {
				Log.errorln(e.getMessage());
				System.exit(1);
			}
		}
		else textModel = new IIDTextModel(alphabet.size());
		Log.printf(Log.Level.VERBOSE, "Text model has %d %s.\n", textModel.getStateCount(), textModel.getStateCount()==1?"state":"states");
		
		List<String> algorithmNames = new ArrayList<String>();
		StringTokenizer st = new StringTokenizer(algorithmParameter,",",false);
		while (st.hasMoreTokens()) algorithmNames.add(st.nextToken());
		DAA[] daas = new DAA[algorithmNames.size()];
		int i=0;
		Log.startTimer();
		for (String name : algorithmNames) {
			Matcher matcher = PatternMatchingAnalysis.getMatcher(name, alphabet.size(), pattern);
			if (matcher==null) {
				Log.errorln("Error: unknown algorithm: "+algorithmNames.get(0)+".");
				return 1;
			}
			Log.startTimer();
			MinimizableDAA daa = new PatternMatchingDAA(matcher.costScheme(), maxCost);
			Log.restartTimer("Constructing DAA for "+name);
			Log.printf(Log.Level.VERBOSE, "DAA for %s has %d states and %d values.\n", name, daa.getStateCount(), daa.getValueCount());
			MinimizedDAA minimizedDaa = new MinimizedDAA(daa);
			Log.stopTimer("Minimizing DAA");
			Log.printf(Log.Level.VERBOSE, "Minimal DAA for %s has %d states.\n", name, minimizedDaa.getStateCount());
			daas[i++] = minimizedDaa;
		}
		DAA daa = null;
		EmissionDifferenceDAA diffDaa = null;
		if (daas.length==1) {
			daa = daas[0];
		} else if (daas.length==2) {
			Log.startTimer();
			diffDaa = new EmissionDifferenceDAA(daas[0], daas[1], maxCost);
			Log.restartTimer("Constructing EmissionDifferenceDAA");
			BitArray reachability = AutomataUtils.findReachableStates(diffDaa);
			Log.restartTimer("Checking reachability of states of EmissionDifferenceDAA");
			Log.printf(Log.Level.VERBOSE, "EmissionDifferenceDAA has %d states (%d reachable) and %d values.\n", diffDaa.getStateCount(), reachability.numberOfOnes(), diffDaa.getValueCount());
			daa = new MinimizedDAA(diffDaa);
			Log.restartTimer("Minimizing EmissionDifferenceDAA");
			reachability = AutomataUtils.findReachableStates(daa);
			Log.stopTimer("Checking reachability of states of minimized EmissionDifferenceDAA");
			Log.printf(Log.Level.VERBOSE, "Minimized EmissionDifferenceDAA has %d states (%d reachable) and %d values.\n", daa.getStateCount(), reachability.numberOfOnes(), daa.getValueCount());
		} else {
			Log.errorln("Error: number of given algorithms must be 1 or 2.");
			return 1;
		}
		Log.startTimer();
		PAA paa = new TextBasedPAA(daa, textModel);
		Log.restartTimer("Constructing PAA");
		Log.printf(Log.Level.VERBOSE, "PAA has %d states. valuecount*statecount: %d\n", paa.getStateCount(), paa.getStateCount()*paa.getValueCount());
		double[] valueDistribution = paa.computeValueDistribution(textLength);
		Log.stopTimer("Computing PAA's state-value distribution");
		Log.stopTimer("Total time");
		Log.printf(Log.Level.STANDARD, "Algorithm: %s\n", algorithmParameter);
		Log.printf(Log.Level.STANDARD, "Analyzed pattern: %s\n", alphabet.buildString(pattern));
		Log.printf(Log.Level.STANDARD, "Text length: %d\n", textLength);
		StringBuffer sb = new StringBuffer();
		sb.append(">>cost_distribution>>");
		if (daas.length==1) {
			Log.printf(Log.Level.STANDARD, "Value range: 0 .. %d\n", maxCost);
			for (double p : valueDistribution) sb.append(" "+p);
		} else {
			Log.printf(Log.Level.STANDARD, "Probability for INVALID_VALUE: %f\n", valueDistribution[diffDaa.encodeValue(EmissionDifferenceDAA.INVALID_VALUE)]);
			Log.printf(Log.Level.STANDARD, "Value range: %d .. %d\n", -maxCost,maxCost);
			double probSmaller = 0.0;
			double probLarger = 0.0;
			for (int j=-maxCost; j<0; ++j) probSmaller+=valueDistribution[diffDaa.encodeValue(j)];
			for (int j=1; j<=maxCost; ++j) probLarger+=valueDistribution[diffDaa.encodeValue(j)];
			Log.printf(Log.Level.STANDARD, "Probabilities <0,=0,>0: %f %f %f\n", probSmaller,valueDistribution[diffDaa.encodeValue(0)],probLarger);
			for (int j=-maxCost; j<=maxCost; ++j) sb.append(" "+valueDistribution[diffDaa.encodeValue(j)]);
		}
		Log.println(Log.Level.STANDARD, sb.toString());
		return 0;
	}

}
